Getting Started¶

# Conventional  way to import pandas 
import pandas as pd 

# Check pandas version
pd.__version__

'1.1.2'

# Show version of all packages 
pd.show_versions()

INSTALLED VERSIONS
------------------
commit           : 2a7d3326dee660824a8433ffd01065f8ac37f7d6
python           : 3.7.8.final.0
python-bits      : 64
OS               : Linux
OS-release       : 5.3.0-64-generic
Version          : #58-Ubuntu SMP Fri Jul 10 19:33:51 UTC 2020
machine          : x86_64
processor        : x86_64
byteorder        : little
LC_ALL           : None
LANG             : en_US.UTF-8
LOCALE           : en_US.UTF-8

pandas           : 1.1.2
numpy            : 1.19.2
pytz             : 2020.1
dateutil         : 2.8.1
pip              : 20.2.3
setuptools       : 47.3.1
Cython           : None
pytest           : 5.4.2
hypothesis       : None
sphinx           : 2.4.4
blosc            : None
feather          : None
xlsxwriter       : None
lxml.etree       : 4.5.2
html5lib         : None
pymysql          : None
psycopg2         : None
jinja2           : 2.11.2
IPython          : 7.14.0
pandas_datareader: 0.9.0
bs4              : 4.9.1
bottleneck       : None
fsspec           : None
fastparquet      : None
gcsfs            : None
matplotlib       : 3.3.1
numexpr          : None
odfpy            : None
openpyxl         : None
pandas_gbq       : None
pyarrow          : None
pytables         : None
pyxlsb           : None
s3fs             : None
scipy            : 1.5.2
sqlalchemy       : 1.3.19
tables           : None
tabulate         : None
xarray           : None
xlrd             : None
xlwt             : None
numba            : 0.50.1

Creating Series¶

# Create Series 
s1 = pd.Series([3, 6, 9, 12])
s1

   3
   6
   9
  12
dtype: int64

# Check type 
type(s1)

pandas.core.series.Series

# To see values 
s1.values

array([ 3,  6,  9, 12])

# To see index/keys 
s1.index

RangeIndex(start=0, stop=4, step=1)

# Creating labeled series 
s2 = pd.Series([200000, 300000, 4000000, 500000], index=['A', 'B', 'C', 'D'])

s2

A     200000
B     300000
C    4000000
D     500000
dtype: int64

s2.values

array([ 200000,  300000, 4000000,  500000])

s2.index

Index(['A', 'B', 'C', 'D'], dtype='object')

# Indexing
s2['A']

# Boolean indexing
s2[s2 > 700000]

C    4000000
dtype: int64

Creating DataFrame¶

# Create a DataFrame 
data = {'Country': ['Belgium', 'India', 'Brazil'],
        'Capital': ['Brussels', 'New Delhi', 'Brasília'],
        'Population': [11190846, 1303171035, 207847528]
}

df = pd.DataFrame(data, columns=["Country", "Capital", "Population"])

df

	Country	Capital	Population
0	Belgium	Brussels	11190846
1	India	New Delhi	1303171035
2	Brazil	Brasília	207847528

# Check type 
type(df)

pandas.core.frame.DataFrame

# Indexing
df["Country"]

  Belgium
    India
   Brazil
Name: Country, dtype: object

# or 
df.Country

  Belgium
    India
   Brazil
Name: Country, dtype: object

# Boolean indexing 
df["Population"]  > 40000000

  False
   True
   True
Name: Population, dtype: bool

df["Country"] == "Belgium"

   True
  False
  False
Name: Country, dtype: bool

df["Capital"] == "Brasilia"

  False
  False
  False
Name: Capital, dtype: bool

About Pandas Reading Data into Pandas